import pandas as pd
pd.options.mode.chained_assignment = None # default='warn'
import requests
import numpy as np
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
import folium
import json
### Import the data
Airbnb_df = pd.read_csv("AB_NYC_2019.csv", usecols=[4,5,6,7,11,12,15])
# Extract last review year from the date
Airbnb_df["last_review"] = pd.DatetimeIndex(Airbnb_df["last_review"]).year
# Drop columns with missing data and reset index
Airbnb_df.dropna(inplace=True)
Airbnb_df.rename(columns={"neighbourhood_group":"Borough", "neighbourhood":"Neighborhood", "latitude":"Latitude", "longitude":"Longitude"}, inplace=True)
Airbnb_df.reset_index(drop=True, inplace=True)
print(f"There were {len(Airbnb_df)} Airbnb location in NYC in 2019")
Airbnb_df.head() # Show the data
There were 38843 Airbnb location in NYC in 2019
| Borough | Neighborhood | Latitude | Longitude | number_of_reviews | last_review | availability_365 | |
|---|---|---|---|---|---|---|---|
| 0 | Brooklyn | Kensington | 40.64749 | -73.97237 | 9 | 2018.0 | 365 |
| 1 | Manhattan | Midtown | 40.75362 | -73.98377 | 45 | 2019.0 | 355 |
| 2 | Brooklyn | Clinton Hill | 40.68514 | -73.95976 | 270 | 2019.0 | 194 |
| 3 | Manhattan | East Harlem | 40.79851 | -73.94399 | 9 | 2018.0 | 0 |
| 4 | Manhattan | Murray Hill | 40.74767 | -73.97500 | 74 | 2019.0 | 129 |
# Removing places with low number of reviews or does not have new reviews
Airbnb_df = Airbnb_df.loc[(Airbnb_df["number_of_reviews"]>=40) & (Airbnb_df['last_review']==2019)]
# removing places that are available less than a month per year and reset index
Airbnb_df = Airbnb_df.loc[Airbnb_df["availability_365"]>=30]
# remove neighborhoods that have less than 30 Airbnb locations
Airbnb_df_neighbor_group = Airbnb_df[["Neighborhood", "Borough"]].groupby("Neighborhood").count()
Airbnb_df_neighbor_group.rename(columns={"Borough":"Count"}, inplace=True)
Airbnb_df_neighbor_group = Airbnb_df_neighbor_group.loc[Airbnb_df_neighbor_group["Count"]<30]
drop_neighbors = list(Airbnb_df_neighbor_group.index)
Airbnb_df = Airbnb_df[~Airbnb_df["Neighborhood"].isin(drop_neighbors)]
Airbnb_df.reset_index(drop=True, inplace=True)
Airbnb_df.describe(include="all")
| Borough | Neighborhood | Latitude | Longitude | number_of_reviews | last_review | availability_365 | |
|---|---|---|---|---|---|---|---|
| count | 5536 | 5536 | 5536.000000 | 5536.000000 | 5536.000000 | 5536.0 | 5536.000000 |
| unique | 3 | 47 | NaN | NaN | NaN | NaN | NaN |
| top | Brooklyn | Bedford-Stuyvesant | NaN | NaN | NaN | NaN | NaN |
| freq | 2561 | 673 | NaN | NaN | NaN | NaN | NaN |
| mean | NaN | NaN | 40.727621 | -73.948223 | 105.179733 | 2019.0 | 196.262645 |
| std | NaN | NaN | 0.049338 | 0.039379 | 66.638485 | 0.0 | 102.990576 |
| min | NaN | NaN | 40.625800 | -74.021960 | 40.000000 | 2019.0 | 30.000000 |
| 25% | NaN | NaN | 40.685585 | -73.978662 | 58.000000 | 2019.0 | 95.000000 |
| 50% | NaN | NaN | 40.721125 | -73.951140 | 85.000000 | 2019.0 | 207.500000 |
| 75% | NaN | NaN | 40.762920 | -73.932240 | 132.000000 | 2019.0 | 285.000000 |
| max | NaN | NaN | 40.858670 | -73.764930 | 629.000000 | 2019.0 | 365.000000 |
print("The totla number of the dataset is {} in {} neighborhood".format(len(Airbnb_df), len(Airbnb_df["Neighborhood"].unique())))
The totla number of the dataset is 5536 in 47 neighborhood
geo_data_url = "https://data.beta.nyc/dataset/0ff93d2d-90ba-457c-9f7e-39e47bf2ac5f/resource/35dd04fb-81b3-479b-a074-a27a37888ce7/download/d085e2f8d0b54d4590b1e7d1f35594c1pediacitiesnycneighborhoods.geojson"
geo_json = pd.read_json(geo_data_url)
poly_df = pd.DataFrame(columns=["Borough", "Neighborhood", "Latitude", "Longitude", "Polygons"])
for i in range(len(geo_json.features)):
poly = geo_json.features[i]["geometry"]["coordinates"] # Get the polygon corners
center = list(Polygon(poly[0]).centroid.coords)[0] # Get polygon center
poly = poly[0] # Polygon corner came in a 3D array, where the 3rd dimension is equal 1
poly = map(tuple,poly) # Convert to a list of tuples
poly = list(poly)
poly_df = poly_df.append({
"Borough": geo_json.features[i]['properties']['borough'],
"Neighborhood": geo_json.features[i]['properties']["neighborhood"],
"Polygons": poly,
"Latitude":center[1],
"Longitude":center[0]},
ignore_index=True)
# Remove Neighborhood that are not in Airbnb_df
poly_df = poly_df[poly_df['Neighborhood'].isin(list(Airbnb_df["Neighborhood"]))]
poly_df.reset_index(drop=True, inplace=True)
poly_df.head()
| Borough | Neighborhood | Latitude | Longitude | Polygons | |
|---|---|---|---|---|---|
| 0 | Queens | Astoria | 40.765187 | -73.919746 | [(-73.90160305064738, 40.76777029715587), (-73... |
| 1 | Brooklyn | Bedford-Stuyvesant | 40.687068 | -73.938201 | [(-73.9411488595606, 40.700281153346914), (-73... |
| 2 | Brooklyn | Bushwick | 40.695749 | -73.918637 | [(-73.90582150629088, 40.694113724380834), (-7... |
| 3 | Brooklyn | Canarsie | 40.638840 | -73.899707 | [(-73.89034734693779, 40.64903360577805), (-73... |
| 4 | Brooklyn | Carroll Gardens | 40.680540 | -73.997064 | [(-73.991332, 40.685448), (-73.98913989974679,... |
def get_neighborhood(point, df):
for j in df.index:
polygon = Polygon(df["Polygons"][j])
if polygon.contains(point):
return poly_df.Neighborhood[j]
Airbnb_df["Neighborhood"] = np.nan
for i in Airbnb_df.index:
point = Point(Airbnb_df.Longitude[i], Airbnb_df.Latitude[i])
Airbnb_df["Neighborhood"][i] = get_neighborhood(point, poly_df)
# Use only necessary columns from here
Airbnb_df = Airbnb_df[["Borough","Neighborhood", "Latitude", "Longitude"]]
Airbnb_df.head()
| Borough | Neighborhood | Latitude | Longitude | |
|---|---|---|---|---|
| 0 | Manhattan | Midtown | 40.75362 | -73.98377 |
| 1 | Brooklyn | Clinton Hill | 40.68514 | -73.95976 |
| 2 | Manhattan | Hell's Kitchen | 40.76489 | -73.98493 |
| 3 | Manhattan | Chinatown | 40.71344 | -73.99037 |
| 4 | Manhattan | Hell's Kitchen | 40.76076 | -73.98867 |
Airbnb_df_density = Airbnb_df[["Neighborhood", "Borough"]].groupby("Neighborhood", as_index=False).count()
Airbnb_df_density.rename(columns={"Borough":"Count"}, inplace = True)
bins = list(Airbnb_df_density["Count"].quantile([0, 0.8, 0.9, 0.95, 0.99, 1]))
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)
folium.Choropleth(
geo_data=geo_data_url,
name="choropleth",
data=Airbnb_df_density,
columns=("Neighborhood", "Count"),
key_on="feature.properties.neighborhood",
fill_color="BuGn",
fill_opacity=0.7,
line_opacity=2,
legend_name="Number of Airbnb locations",
bins=bins
).add_to(m)
for lat, lon, borough, neighborhood in zip(poly_df['Latitude'], poly_df['Longitude'], poly_df['Borough'], poly_df['Neighborhood']):
label = "{}, {}".format(borough, neighborhood)
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lon],
radius=5,
popup=label,
color='blue',
fill=True,
fill_color='#3186cc',
fill_opacity=0.7,
parse_html=False,).add_to(m)
m
Airbnb_borough = Airbnb_df[["Borough", "Neighborhood"]].groupby("Borough", as_index=False).count()
Airbnb_borough.rename(columns={"Neighborhood":"Count"}, inplace = True)
Airbnb_borough.head()
| Borough | Count | |
|---|---|---|
| 0 | Brooklyn | 2561 |
| 1 | Manhattan | 2268 |
| 2 | Queens | 707 |
Airbnb_df = Airbnb_df[~Airbnb_df["Borough"].isin(["Queens"])]
Airbnb_df.reset_index(inplace=True, drop=True)
# Update the density df also
Airbnb_df_density = Airbnb_df[["Neighborhood", "Borough"]].groupby("Neighborhood", as_index=False).count()
Airbnb_df_density.rename(columns={"Borough":"Count"}, inplace = True)
# Update poly df also
poly_df = poly_df[poly_df['Neighborhood'].isin(list(Airbnb_df["Neighborhood"]))]
poly_df.reset_index(drop=True, inplace=True)
Airbnb_df
| Borough | Neighborhood | Latitude | Longitude | |
|---|---|---|---|---|
| 0 | Manhattan | Midtown | 40.75362 | -73.98377 |
| 1 | Brooklyn | Clinton Hill | 40.68514 | -73.95976 |
| 2 | Manhattan | Hell's Kitchen | 40.76489 | -73.98493 |
| 3 | Manhattan | Chinatown | 40.71344 | -73.99037 |
| 4 | Manhattan | Hell's Kitchen | 40.76076 | -73.98867 |
| ... | ... | ... | ... | ... |
| 4824 | Brooklyn | Canarsie | 40.63540 | -73.90039 |
| 4825 | Brooklyn | East Flatbush | 40.65057 | -73.92383 |
| 4826 | Brooklyn | Canarsie | 40.63502 | -73.88834 |
| 4827 | Brooklyn | South Slope | 40.66371 | -73.98760 |
| 4828 | Manhattan | Midtown | 40.74595 | -73.98439 |
4829 rows × 4 columns
# # Use this cell if you want to get neighborhoods locations using geopy library
# # Note that this can give a time out error depend on how many times you use it
# from geopy.geocoders import Nominatim
# ny_df = poly_df.drop(columns=["Polygons"])
# ny_df["Longitude"] = np.nan
# ny_df["Latitude"] = np.nan
# for i in ny_df.index:
# borough = ny_df.loc[i, "Borough"]
# neighborood = ny_df.loc[i,"Neighborhood"]
# geolocator = Nominatim(user_agent="ny_explorer")
# address = '{}, {}, New York City, NY'.format(neighborhood, borough)
# latitude = None
# while latitude is None:
# location = geolocator.geocode(address)
# latitude = location.latitude
# longitude = location.longitude
# ny_df.loc[i,"Latitude"] = latitude
# ny_df.loc[i,"Longitude"] = longitude
# ny_df.head()
## use this cell if you want to get neighborhoods locations from the polygons centers
ny_df = poly_df[["Neighborhood", "Latitude", "Longitude"]]
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)
folium.Choropleth(
geo_data=geo_data_url,
name="choropleth",
data=Airbnb_df_density,
columns=("Neighborhood", "Count"),
key_on="feature.properties.neighborhood",
fill_color="BuGn",
fill_opacity=0.7,
line_opacity=2,
legend_name="Number of Airbnb locations",
bins=bins
).add_to(m)
for lat, lon, neighborhood in zip(ny_df['Latitude'], ny_df['Longitude'], ny_df['Neighborhood']):
label = neighborhood
label = folium.Popup(label, parse_html=True)
folium.CircleMarker(
[lat, lon],
radius=5,
popup=label,
color='blue',
fill=True,
fill_color='#3186cc',
fill_opacity=0.7,
parse_html=False).add_to(m)
m
CLIENT_ID = 'T3R3TQXS5UYZX15AUJOZLFZDBEPIWNU00LMJHPSZLBICQAH0' # your Foursquare ID
CLIENT_SECRET = 'V4RHTWCYJPYBTOKK0R1PYGE3Y4NU2WBI5RHZVAIRDGEVWHMN' # your Foursquare Secret
ACCESS_TOKEN = 'SK4JDDL5B03EUDTZTY5TONH3M03JU4S1RDEK3FQ3WDHYD2ZJ' # your FourSquare Access Token
VERSION = '20210603' # Foursquare API version, I choosed the day I created this Notebook
LIMIT = 120 # limit of number of venues returned by Foursquare API
radius = 1000 # define radius
print('Your credentails:')
print('CLIENT_ID: ' + CLIENT_ID)
print('CLIENT_SECRET:' + CLIENT_SECRET)
Your credentails: CLIENT_ID: T3R3TQXS5UYZX15AUJOZLFZDBEPIWNU00LMJHPSZLBICQAH0 CLIENT_SECRET:V4RHTWCYJPYBTOKK0R1PYGE3Y4NU2WBI5RHZVAIRDGEVWHMN
def getNearbyVenues(names, latitudes, longitudes, radius=500):
venues_list=[]
for name, lat, lng in zip(names, latitudes, longitudes):
print(name)
# create the API request URL
url = 'https://api.foursquare.com/v2/venues/explore?&client_id={}&client_secret={}&v={}&ll={},{}&radius={}&limit={}'.format(
CLIENT_ID,
CLIENT_SECRET,
VERSION,
lat,
lng,
radius,
LIMIT)
# make the GET request
results = requests.get(url).json()["response"]['groups'][0]['items']
# return only relevant information for each nearby venue
venues_list.append([(name,
v['venue']['name'],
v['venue']['categories'][0]['name'],
v['venue']['id']) for v in results])
nearby_venues = pd.DataFrame([item for venue_list in venues_list for item in venue_list])
nearby_venues.columns = ['Neighborhood',
'Venue',
'Venue Category',
'Venue id']
return(nearby_venues)
ny_venues = getNearbyVenues(names = ny_df['Neighborhood'],
latitudes = ny_df['Latitude'],
longitudes = ny_df['Longitude'],
radius=1000)
Bedford-Stuyvesant Bushwick Canarsie Carroll Gardens Chelsea Chinatown Clinton Hill Crown Heights Cypress Hills East Flatbush East Harlem East New York East Village Flatbush Fort Greene Gowanus Gramercy Greenpoint Greenwich Village Harlem Hell's Kitchen Kips Bay Lower East Side Midtown Nolita Park Slope Prospect Heights Prospect-Lefferts Gardens SoHo South Slope Sunset Park Upper East Side Upper West Side Washington Heights West Village Williamsburg
print(ny_venues.shape)
ny_venues.head()
(3409, 4)
| Neighborhood | Venue | Venue Category | Venue id | |
|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | Saraghina | Pizza Place | 4a593de0f964a52015b91fe3 |
| 1 | Bedford-Stuyvesant | Bar Lunatico | Bar | 5490f3d2498e4e2727ce17ac |
| 2 | Bedford-Stuyvesant | Do The Right Thing Crossing | Historic Site | 4dbf2ef04b2221ec2d553767 |
| 3 | Bedford-Stuyvesant | Saraghina Bakery | Bakery | 53ff6b91498e916b5804dc9b |
| 4 | Bedford-Stuyvesant | Bar Camillo | Italian Restaurant | 5e4567fa2eafa100085e9ec3 |
print('There are {} uniques categories.'.format(len(ny_venues['Venue Category'].unique())))
There are 304 uniques categories.
ny_onehot = pd.get_dummies(ny_venues["Venue Category"])
categories_names = list(ny_onehot.columns)
if "Neighborhood" in categories_names:
categories_names.remove("Neighborhood")
ny_onehot['Neighborhood'] = ny_venues['Neighborhood']
fixed_columns = ["Neighborhood"] + categories_names
ny_onehot = ny_onehot[fixed_columns]
ny_onehot.head()
| Neighborhood | Accessories Store | Adult Boutique | African Restaurant | American Restaurant | Amphitheater | Antique Shop | Arepa Restaurant | Argentinian Restaurant | Art Gallery | ... | Video Store | Vietnamese Restaurant | Warehouse Store | Waterfront | Whisky Bar | Wine Bar | Wine Shop | Wings Joint | Women's Store | Yoga Studio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 1 | Bedford-Stuyvesant | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 2 | Bedford-Stuyvesant | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | Bedford-Stuyvesant | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 4 | Bedford-Stuyvesant | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 305 columns
ny_groups = ny_onehot.groupby("Neighborhood").sum().reset_index()
ny_groups.head(10)
| Neighborhood | Accessories Store | Adult Boutique | African Restaurant | American Restaurant | Amphitheater | Antique Shop | Arepa Restaurant | Argentinian Restaurant | Art Gallery | ... | Video Store | Vietnamese Restaurant | Warehouse Store | Waterfront | Whisky Bar | Wine Bar | Wine Shop | Wings Joint | Women's Store | Yoga Studio | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 1 | 3 | 0 | 0 | 1 |
| 1 | Bushwick | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 |
| 2 | Canarsie | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 3 | Carroll Gardens | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 3 | 3 | 0 | 0 | 3 |
| 4 | Chelsea | 0 | 0 | 0 | 2 | 0 | 1 | 0 | 0 | 13 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 3 |
| 5 | Chinatown | 0 | 0 | 0 | 2 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 4 | 2 | 0 | 1 | 0 |
| 6 | Clinton Hill | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 1 | 0 | 0 | 0 | 1 | 5 | 0 | 0 | 5 |
| 7 | Crown Heights | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | ... | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 1 |
| 8 | Cypress Hills | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 9 | East Flatbush | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
10 rows × 305 columns
def return_most_common_venues(row, num_top_venues):
row_categories = row.iloc[1:]
row_categories_sorted = row_categories.sort_values(ascending=False)
return row_categories_sorted.index.values[0:num_top_venues]
num_top_venues = 10
indicators = ['st', 'nd', 'rd']
# create columns according to number of top venues
columns = ['Neighborhood']
for ind in np.arange(num_top_venues):
try:
columns.append('{}{} Most Common Venue'.format(ind+1, indicators[ind]))
except:
columns.append('{}th Most Common Venue'.format(ind+1))
# create a new dataframe
neighborhoods_venues_sorted = pd.DataFrame(columns=columns)
neighborhoods_venues_sorted['Neighborhood'] = ny_groups['Neighborhood']
for ind in np.arange(ny_groups.shape[0]):
neighborhoods_venues_sorted.iloc[ind, 1:] = return_most_common_venues(ny_groups.iloc[ind, :], num_top_venues)
neighborhoods_venues_sorted.head()
| Neighborhood | 1st Most Common Venue | 2nd Most Common Venue | 3rd Most Common Venue | 4th Most Common Venue | 5th Most Common Venue | 6th Most Common Venue | 7th Most Common Venue | 8th Most Common Venue | 9th Most Common Venue | 10th Most Common Venue | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | Bar | Coffee Shop | Café | Pizza Place | Chinese Restaurant | Deli / Bodega | Caribbean Restaurant | Playground | Discount Store | Juice Bar |
| 1 | Bushwick | Bar | Coffee Shop | Mexican Restaurant | Pizza Place | Bakery | Deli / Bodega | Gym | Italian Restaurant | Latin American Restaurant | Mediterranean Restaurant |
| 2 | Canarsie | Caribbean Restaurant | Grocery Store | Sandwich Place | Ice Cream Shop | Pizza Place | Donut Shop | Bus Station | Bank | Gym | Food |
| 3 | Carroll Gardens | Italian Restaurant | Coffee Shop | Bar | Bakery | Pizza Place | Yoga Studio | Cocktail Bar | Spa | Wine Shop | Wine Bar |
| 4 | Chelsea | Art Gallery | Coffee Shop | Gym / Fitness Center | Yoga Studio | Theater | Boxing Gym | Café | Bakery | Bagel Shop | Spa |
# add markers to the map
bins = list(Airbnb_df_density["Count"].quantile([0, 0.8, 0.9, 0.95, 0.99, 1]))
m = folium.Map(location=[40.7128, -74.0060], zoom_start=11)
folium.Choropleth(
geo_data=url,
name="choropleth",
data=Airbnb_df_density,
columns=("Neighborhood", "Count"),
key_on="feature.properties.neighborhood",
fill_color="BuGn",
fill_opacity=0.7,
line_opacity=2,
legend_name="Number of Shooting cases",
bins=bins
).add_to(m)
for lat, lon, poi in zip(ny_df['Latitude'], ny_df['Longitude'], ny_df['Neighborhood']):
count = str(Airbnb_df_density["Count"].loc[Airbnb_df_density["Neighborhood"]==poi].values[0])
label = folium.Popup(str(poi) + ' Number of Airbnb ' + str(count), parse_html=True)
folium.CircleMarker(
[lat, lon],
radius=5,
popup=label,
# color=rainbow[int(cluster-1)],
fill=True,
# fill_color=rainbow[int(cluster-1)],
fill_opacity=0.7).add_to(m)
m
Airbnb_areas_venus = Airbnb_df_density.merge(neighborhoods_venues_sorted, on="Neighborhood")
Airbnb_areas_venus.sort_values("Count", ascending=False, ignore_index=True, inplace=True)
Airbnb_areas_venus.head(10) #Let us see the top 10 neighborhood
| Neighborhood | Count | 1st Most Common Venue | 2nd Most Common Venue | 3rd Most Common Venue | 4th Most Common Venue | 5th Most Common Venue | 6th Most Common Venue | 7th Most Common Venue | 8th Most Common Venue | 9th Most Common Venue | 10th Most Common Venue | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | 673 | Bar | Coffee Shop | Café | Pizza Place | Chinese Restaurant | Deli / Bodega | Caribbean Restaurant | Playground | Discount Store | Juice Bar |
| 1 | Harlem | 440 | Southern / Soul Food Restaurant | Coffee Shop | Cocktail Bar | Jazz Club | Bar | Seafood Restaurant | Mexican Restaurant | French Restaurant | Café | Yoga Studio |
| 2 | Williamsburg | 433 | Bar | Coffee Shop | Pizza Place | Bakery | Italian Restaurant | Mexican Restaurant | Cocktail Bar | Café | Chinese Restaurant | Japanese Restaurant |
| 3 | Bushwick | 301 | Bar | Coffee Shop | Mexican Restaurant | Pizza Place | Bakery | Deli / Bodega | Gym | Italian Restaurant | Latin American Restaurant | Mediterranean Restaurant |
| 4 | Hell's Kitchen | 284 | Theater | Coffee Shop | Bar | Gym / Fitness Center | Gym | Wine Shop | Italian Restaurant | Thai Restaurant | Wine Bar | Gift Shop |
| 5 | Crown Heights | 223 | Caribbean Restaurant | Pizza Place | Southern / Soul Food Restaurant | Café | Bakery | Bar | Fried Chicken Joint | Bagel Shop | Discount Store | Juice Bar |
| 6 | East Village | 210 | Wine Bar | Japanese Restaurant | Bar | Italian Restaurant | Juice Bar | Dessert Shop | Coffee Shop | Ice Cream Shop | Korean Restaurant | Pizza Place |
| 7 | East Harlem | 204 | Mexican Restaurant | Bakery | Park | Pizza Place | Italian Restaurant | Café | Latin American Restaurant | Thai Restaurant | Cocktail Bar | Gym / Fitness Center |
| 8 | Upper East Side | 178 | Italian Restaurant | Coffee Shop | Sushi Restaurant | Ice Cream Shop | Gym / Fitness Center | Bar | Bakery | Dessert Shop | Thai Restaurant | Café |
| 9 | Upper West Side | 169 | Italian Restaurant | Bakery | Coffee Shop | Café | Mediterranean Restaurant | American Restaurant | Wine Bar | Gym | Bar | Park |
cafe_shops = Airbnb_areas_venus["Neighborhood"].to_frame().merge(ny_groups[["Neighborhood", "Café", "Coffee Shop"]], on="Neighborhood")
cafe_shops['Number of coffee shops'] = cafe_shops['Café'] + cafe_shops["Coffee Shop"]
top_10_neighborhoods = cafe_shops[["Neighborhood"]].head(10).values
cafe_shops[['Neighborhood', "Number of coffee shops"]].head(10)
| Neighborhood | Number of coffee shops | |
|---|---|---|
| 0 | Bedford-Stuyvesant | 14 |
| 1 | Harlem | 7 |
| 2 | Williamsburg | 9 |
| 3 | Bushwick | 11 |
| 4 | Hell's Kitchen | 5 |
| 5 | Crown Heights | 7 |
| 6 | East Village | 5 |
| 7 | East Harlem | 5 |
| 8 | Upper East Side | 8 |
| 9 | Upper West Side | 8 |
cafe_df = ny_venues.loc[(ny_venues["Neighborhood"].isin(np.reshape(top_10_neighborhoods,(10,))) & (ny_venues["Venue Category"].isin(["Café", "Coffee Shop"])))]
cafe_df.reset_index(inplace=True, drop=True)
cafe_df
| Neighborhood | Venue | Venue Category | Venue id | |
|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | Brooklyn Kettle | Coffee Shop | 56bf7516cd1024b6713b3563 |
| 1 | Bedford-Stuyvesant | Little Roy Coffee Co. | Coffee Shop | 589f51079343e07629090fb6 |
| 2 | Bedford-Stuyvesant | Zaca Cafe | Café | 5af862af8a6f17002ce11661 |
| 3 | Bedford-Stuyvesant | BoHaus Coffee and Flowers | Coffee Shop | 572e0e7f498ef0f740dd8229 |
| 4 | Bedford-Stuyvesant | Brown Butter | Café | 599321fcd4cc9849c017f19c |
| ... | ... | ... | ... | ... |
| 74 | Williamsburg | Porto Rico Importing Co. | Coffee Shop | 4a567942f964a5203db51fe3 |
| 75 | Williamsburg | Pecoraro Latteria | Café | 5fd85d1c9ff06f26cb354d16 |
| 76 | Williamsburg | Think Coffee | Coffee Shop | 59a02f741755620376920796 |
| 77 | Williamsburg | Variety Coffee Roasters | Coffee Shop | 49df6421f964a520c4601fe3 |
| 78 | Williamsburg | The Flat's BK Speed Coffee | Coffee Shop | 55b02e05498ef881a3a44c6d |
79 rows × 4 columns
cafe_df["likes"] = np.nan
cafe_df["price_category"] = np.nan
cafe_df["tips_count"] = np.nan
for i in cafe_df.index:
venue_id = cafe_df["Venue id"][i]
url = 'https://api.foursquare.com/v2/venues/{}?client_id={}&client_secret={}&oauth_token={}&v={}'.format(
venue_id,
CLIENT_ID,
CLIENT_SECRET,
ACCESS_TOKEN,
VERSION)
result = requests.get(url).json()['response']['venue']
cafe_df["likes"][i] = result['likes']['count']
cafe_df["price_category"][i] = result['price']['message']
cafe_df["tips_count"][i] = result['tips']['count']
cafe_details_df = cafe_df.drop(columns="Venue id")
cafe_details_df
| Neighborhood | Venue | Venue Category | likes | price_category | tips_count | |
|---|---|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | Brooklyn Kettle | Coffee Shop | 24.0 | Cheap | 9.0 |
| 1 | Bedford-Stuyvesant | Little Roy Coffee Co. | Coffee Shop | 52.0 | Cheap | 6.0 |
| 2 | Bedford-Stuyvesant | Zaca Cafe | Café | 10.0 | Cheap | 6.0 |
| 3 | Bedford-Stuyvesant | BoHaus Coffee and Flowers | Coffee Shop | 27.0 | Cheap | 2.0 |
| 4 | Bedford-Stuyvesant | Brown Butter | Café | 30.0 | Cheap | 8.0 |
| ... | ... | ... | ... | ... | ... | ... |
| 74 | Williamsburg | Porto Rico Importing Co. | Coffee Shop | 57.0 | Cheap | 25.0 |
| 75 | Williamsburg | Pecoraro Latteria | Café | 7.0 | Cheap | 2.0 |
| 76 | Williamsburg | Think Coffee | Coffee Shop | 46.0 | Cheap | 6.0 |
| 77 | Williamsburg | Variety Coffee Roasters | Coffee Shop | 433.0 | Cheap | 138.0 |
| 78 | Williamsburg | The Flat's BK Speed Coffee | Coffee Shop | 83.0 | Cheap | 16.0 |
79 rows × 6 columns
Results = cafe_details_df[['Neighborhood', "likes", "tips_count"]].groupby("Neighborhood").sum().reindex(np.reshape(top_10_neighborhoods,(10,))).merge(cafe_shops[['Neighborhood', "Number of coffee shops"]], on="Neighborhood")
Results['likes per shop'] = Results["likes"] / Results['Number of coffee shops']
Results['Tips per shop'] = Results["tips_count"] / Results['Number of coffee shops']
Results
| Neighborhood | likes | tips_count | Number of coffee shops | likes per shop | Tips per shop | |
|---|---|---|---|---|---|---|
| 0 | Bedford-Stuyvesant | 334.0 | 83.0 | 14 | 23.857143 | 5.928571 |
| 1 | Harlem | 820.0 | 295.0 | 7 | 117.142857 | 42.142857 |
| 2 | Williamsburg | 1329.0 | 384.0 | 9 | 147.666667 | 42.666667 |
| 3 | Bushwick | 685.0 | 153.0 | 11 | 62.272727 | 13.909091 |
| 4 | Hell's Kitchen | 543.0 | 177.0 | 5 | 108.600000 | 35.400000 |
| 5 | Crown Heights | 380.0 | 93.0 | 7 | 54.285714 | 13.285714 |
| 6 | East Village | 2673.0 | 924.0 | 5 | 534.600000 | 184.800000 |
| 7 | East Harlem | 109.0 | 52.0 | 5 | 21.800000 | 10.400000 |
| 8 | Upper East Side | 1081.0 | 334.0 | 8 | 135.125000 | 41.750000 |
| 9 | Upper West Side | 1510.0 | 409.0 | 8 | 188.750000 | 51.125000 |
Results.loc[(Results['likes per shop']>100) & (Results['Tips per shop']>20)].reset_index(drop=True)
| Neighborhood | likes | tips_count | Number of coffee shops | likes per shop | Tips per shop | |
|---|---|---|---|---|---|---|
| 0 | Harlem | 820.0 | 295.0 | 7 | 117.142857 | 42.142857 |
| 1 | Williamsburg | 1329.0 | 384.0 | 9 | 147.666667 | 42.666667 |
| 2 | Hell's Kitchen | 543.0 | 177.0 | 5 | 108.600000 | 35.400000 |
| 3 | East Village | 2673.0 | 924.0 | 5 | 534.600000 | 184.800000 |
| 4 | Upper East Side | 1081.0 | 334.0 | 8 | 135.125000 | 41.750000 |
| 5 | Upper West Side | 1510.0 | 409.0 | 8 | 188.750000 | 51.125000 |
We were able to narrow the possible locations for opening new cafe using Airbnb data and Foursquare API. The last descision for the location will depend on the budget, the type of cafe and the business plan